The greatest value of a picture is when it forces us to notice what we never expected to see.
seaborn plotsMany, many more in these categories - these are just our focus for today!
import pandas as pd
mx_csv = "http://personal.tcu.edu/kylewalker/mexico.csv"
mx = pd.read_csv(mx_csv)
mx.head()How about sorting our data?
mx_sorted = mx.sort_values(by = 'gdp08', ascending = False)
mx_sorted.head()%matplotlib inline
import seaborn as sns
mx.plot(x= 'name', y = 'gdp08', kind = 'bar')seabornsns.set(style = 'whitegrid')
sns.barplot(x = 'gdp08', y = 'name', data = mx_sorted)seabornsns.stripplot(x = 'gdp08', y = 'name', data = mx_sorted)zac = mx[mx.name == 'Zacatecas'].drop(['name', 'FID', 'gdp08', 'mus09'], axis = 1).squeeze()
zac.name = 'Zacatecas'
zac.plot(kind = 'pie', figsize = (6, 6))pandashs_drop = pd.read_csv('http://personal.tcu.edu/kylewalker/data/hs_drop.csv')
hs_drop.sort_values('year', inplace = True)
hs_drop.set_index('year', inplace = True)
hs_drop.plot() # pandas plotting defaults to line charts, infers x from indexseabornpointplot and factorploths_drop.reset_index(inplace = True)
hs_long = pd.melt(hs_drop, id_vars = 'year',
value_vars = ['m_rate', 'f_rate'],
value_name = 'percent_drop', var_name = 'gender')
# We use factorplot because it gives us greater control over the axes
chart = sns.factorplot(data = hs_long, x = 'year',
y = 'percent_drop', hue = 'gender', size = 8)
chart.set_xticklabels(rotation = 45, step = 3)seabornpandasmx.plot(x = 'mus09', y = 'pri10', kind = 'scatter')seabornlmplot and regplot functionssns.lmplot(data = mx, x = 'mus09', y = 'pri10')pandas: .corr()mx['mus09'].corr(mx['pri10'])
0.41639990565936902 # the result